#include "Mesh.h"
#include "Element.h"
#include <iostream>
#include <string>
#include <cmath>
#include <fstream>

/// Eigen is linear library, providing sparse matrix, linear operator,
/// and linear solver.
#include <Eigen/Sparse>
#include <Eigen/IterativeLinearSolvers>
typedef Eigen::SparseMatrix<double> SpMat;
typedef Eigen::Triplet<double> Tri;
typedef Eigen::VectorXd RHS;
typedef Eigen::VectorXd Solution;

/** 
 * Real solution. Can be used as Dirichlet boundary conditions.
 * 
 * @param _p variable.
 * 
 * @return function value.
 */
double u(const Point<2, double> &_p);

/** 
 * Source term.
 * 
 * @param _p variable.
 * 
 * @return function value.
 */
double f(const Point<2, double> &_p);

void P1Element_Solver(size_t nx, size_t ny);

void P2Element_Solver(size_t nx, size_t ny);

int main(int argc, char *argv[])
{
	// P1Element_Solver(32, 32);

	int nx = 2;
	int ny = 2;
	if (argc > 2)
	{
		nx = atoi(argv[1]);
		ny = atoi(argv[2]);
	}
	// for (int n = 2; n < 3; n *= 2)
	P2Element_Solver(nx, ny);

	return 0;
};

double u(const Point<2, double> &_p)
{
	return std::sin(M_PI * _p[0]) * std::sin(2.0 * M_PI * _p[1]);
};

double f(const Point<2, double> &_p)
{
	return 5.0 * M_PI * M_PI * u(_p);
};

void P1Element_Solver(size_t nx, size_t ny)
{
	std::cout << std::endl
			  << "P1 Element Solver Start" << std::endl;
	/// Need a reference element. For this example, only one kind of
	/// element.
	P1Element<double> ref_ele;

	/// The algebraic accuracy for the quadrature on the element is 1.
	ref_ele.read_quad_info("data/triangle.tmp_geo", 1);
	/// Get the quadrature information from the reference element.
	int n_quad_pnts = ref_ele.get_n_quad_points();
	double volume = ref_ele.get_volume();

	Point<2, double> x0y0({0.0, 0.0});
	Point<2, double> x1y1({1.0, 1.0});
	double hx = (x1y1[0] - x0y0[0]) / nx;
	double hy = (x1y1[1] - x0y0[1]) / ny;

	size_t n_ele = nx * ny * 2;
	size_t n_dof = ref_ele.get_n_dofs();
	size_t n_total_dofs = (nx + 1) * (ny + 1);

	/// Here we use Eigen library for all linear structures and
	/// operators.
	// Matrix<double> A(n_total_dofs, n_total_dofs);
	// std::valarray<double> rhs(0.0, n_total_dofs); / Linear algebra
	// interface. To Eigen.
	SpMat K(n_total_dofs, n_total_dofs);
	RHS Rhs(n_total_dofs);
	std::vector<Tri> TriList(n_ele * n_dof * n_dof * n_quad_pnts);
	std::vector<Tri>::iterator it = TriList.begin();
	std::valarray<int> dof(3);
	/// Assembling ... For each element in the mesh.
	for (size_t ei = 0; ei < n_ele; ei++)
	{
		if (ei % 2 == 0)
		{
			size_t ix = (ei / 2) % nx;
			size_t iy = (ei / 2) / nx;
			Point<2, double> v0({ix * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, iy * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});

			dof[0] = iy * (nx + 1) + ix;
			dof[1] = iy * (nx + 1) + ix + 1;
			dof[2] = (iy + 1) * (nx + 1) + ix;

			v0.set_index(dof[0]);
			v1.set_index(dof[1]);
			v2.set_index(dof[2]);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
		}
		else
		{
			size_t ix = ((ei - 1) / 2) % nx;
			size_t iy = ((ei - 1) / 2) / nx;
			Point<2, double> v0({(ix + 1) * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, (iy + 1) * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});

			dof[0] = iy * (nx + 1) + ix + 1;
			dof[1] = (iy + 1) * (nx + 1) + ix + 1;
			dof[2] = (iy + 1) * (nx + 1) + ix;

			v0.set_index(dof[0]);
			v1.set_index(dof[1]);
			v2.set_index(dof[2]);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
		}
		/// Get the quadrature information from the reference element.
		const std::valarray<Point<2, double>> &quad_pnts = ref_ele.get_quad_points();
		const std::valarray<double> &weights = ref_ele.get_quad_weights();
		int n_quad_pnts = ref_ele.get_n_quad_points();
		double volume = ref_ele.get_volume();
		/// Begin to assemble ...
		for (size_t l = 0; l < n_quad_pnts; l++)
		{
			/// Jacobian information for the coordinates transformation.
			double detJ = ref_ele.global2local_Jacobi_det(quad_pnts[l]);
			Matrix<double> invJ = ref_ele.local2global_Jacobi(quad_pnts[l]);

			/// Build the entities of the coefficent matrix and right hand side vector.
			for (size_t i = 0; i < n_dof; i++)
			{
				std::valarray<double> grad_i = invJ * ref_ele.basis_gradient(i, quad_pnts[l]);
				double phi_i = ref_ele.basis_function(i, quad_pnts[l]);
				for (size_t j = 0; j < n_dof; j++)
				{
					std::valarray<double> grad_j = invJ * ref_ele.basis_gradient(j, quad_pnts[l]);
					/// a(grad phi_i, grad phi_j)
					double cont = (grad_i[0] * grad_j[0] + grad_i[1] * grad_j[1]) * weights[l] * detJ * volume;
					// A(dof[i], dof[j]) += cont;
					*it = Tri(dof[i], dof[j], cont);
					it++;
				}
				/// rhs(i) = \int f(x_i) phi_i dx
				// rhs[dof[i]] += f(ref_ele.local2global(quad_pnts[l])) * phi_i * weights[l] * detJ * volume;
				Rhs[dof[i]] += f(ref_ele.local2global(quad_pnts[l])) * phi_i * weights[l] * detJ * volume;
			}
		}
	}
	K.setFromTriplets(TriList.begin(), TriList.end());
	K.makeCompressed();

	for (size_t i = 0; i < n_total_dofs; i++)
	{
		size_t ix = i % (nx + 1);
		size_t iy = i / (nx + 1);
		if (ix == 0 || ix == nx || iy == 0 || iy == ny)
		{
			Point<2, double> global_dof({ix * hx, iy * hy});
			double boundary_value = u(global_dof);
			Rhs[i] = boundary_value * K.coeffRef(i, i);
			for (Eigen::SparseMatrix<double>::InnerIterator it(K, i); it; ++it)
			{
				size_t row = it.row();
				if (row == i)
					continue;
				Rhs[row] -= K.coeffRef(i, row) * boundary_value;
				K.coeffRef(i, row) = 0.0;
				K.coeffRef(row, i) = 0.0;
			}
		}
	};

	//    std::cout << K << std::endl;
	//    std::cout << Rhs << std::endl;

	Eigen::ConjugateGradient<Eigen::SparseMatrix<double>> Solver_sparse;
	Eigen::VectorXd x1_sparse;
	Solver_sparse.setTolerance(1e-15);
	Solver_sparse.compute(K);
	Solution solution = Solver_sparse.solve(Rhs);

	/// stupid error!
	double error = 0.0;
	for (size_t i = 0; i < n_total_dofs; i++)
	{
		size_t ix = i % (nx + 1);
		size_t iy = i / (nx + 1);
		Point<2, double> global_dof({ix * hx, iy * hy});

		error += (solution[i] - u(global_dof)) * (solution[i] - u(global_dof));
	}
	error = std::sqrt(error);
	std::cout << "error: " << error << std::endl;

	///L2error
	ref_ele.read_quad_info("data/triangle.tmp_geo", 4);
	error = 0.0;
	const std::valarray<Point<2, double>> &quad_pnts = ref_ele.get_quad_points();
	const std::valarray<double> &weights = ref_ele.get_quad_weights();
	n_quad_pnts = ref_ele.get_n_quad_points();
	for (size_t ei = 0; ei < n_ele; ei++)
	{
		if (ei % 2 == 0)
		{
			size_t ix = (ei / 2) % nx;
			size_t iy = (ei / 2) / nx;
			Point<2, double> v0({ix * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, iy * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});

			dof[0] = iy * (nx + 1) + ix;
			dof[1] = iy * (nx + 1) + ix + 1;
			dof[2] = (iy + 1) * (nx + 1) + ix;

			v0.set_index(dof[0]);
			v1.set_index(dof[1]);
			v2.set_index(dof[2]);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
		}
		else
		{
			size_t ix = ((ei - 1) / 2) % nx;
			size_t iy = ((ei - 1) / 2) / nx;
			Point<2, double> v0({(ix + 1) * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, (iy + 1) * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});

			dof[0] = iy * (nx + 1) + ix + 1;
			dof[1] = (iy + 1) * (nx + 1) + ix + 1;
			dof[2] = (iy + 1) * (nx + 1) + ix;

			v0.set_index(dof[0]);
			v1.set_index(dof[1]);
			v2.set_index(dof[2]);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
		}

		/// Get the quadrature information from the reference element.
		const std::valarray<Point<2, double>> &quad_pnts = ref_ele.get_quad_points();
		const std::valarray<double> &weights = ref_ele.get_quad_weights();
		int n_quad_pnts = ref_ele.get_n_quad_points();
		double volume = ref_ele.get_volume();
		/// Begin to assemble ...
		for (size_t l = 0; l < n_quad_pnts; l++)
		{
			///
			double detJ = ref_ele.global2local_Jacobi_det(quad_pnts[l]);
			double u_sol = 0;
			for (size_t i = 0; i < n_dof; i++)
			{
				double phi_i = ref_ele.basis_function(i, quad_pnts[l]);
				u_sol += phi_i * solution[dof[i]];
			}
			error += (u(ref_ele.local2global(quad_pnts[l])) - u_sol) * (u(ref_ele.local2global(quad_pnts[l])) - u_sol) * weights[l] * detJ * volume;
		}
	}
	error = std::sqrt(error);
	std::cout << "L2 error: " << error << std::endl;

	/// Output the solution to file in OpenDx format.
	std::ofstream output("u.dx");
	output << "object 1 class array type float rank 1 shape 2 item "
		   << n_total_dofs
		   << " data follows" << std::endl;
	output.setf(std::ios::fixed);
	output.precision(20);
	for (size_t i = 0; i < n_total_dofs; i++)
	{
		size_t ix = i % (nx + 1);
		size_t iy = i / (nx + 1);
		output << ix * hx << "\t" << iy * hy << std::endl;
	}

	output << std::endl;
	output << "object 2 class array type int rank 1 shape 3 item "
		   << n_ele << " data follows" << std::endl;

	for (size_t ei = 0; ei < n_ele; ei++)
	{
		if (ei % 2 == 0)
		{
			size_t ix = (ei / 2) % nx;
			size_t iy = (ei / 2) / nx;
			output << iy * (nx + 1) + ix << "\t"
				   << iy * (nx + 1) + ix + 1 << "\t"
				   << (iy + 1) * (nx + 1) + ix << std::endl;
		}
		else
		{
			size_t ix = ((ei - 1) / 2) % nx;
			size_t iy = ((ei - 1) / 2) / nx;
			output << iy * (nx + 1) + ix + 1 << "\t"
				   << (iy + 1) * (nx + 1) + ix + 1 << "\t"
				   << (iy + 1) * (nx + 1) + ix << std::endl;
		}
	}
	output << "attribute \"element type\" string \"triangles\"" << std::endl;
	output << "attribute \"ref\" string \"positions\"" << std::endl;
	output << std::endl;
	output << "object 3 class array type float rank 0 item "
		   << n_total_dofs
		   << " data follows" << std::endl;
	for (size_t i = 0; i < n_total_dofs; i++)
		output << solution[i] << std::endl;

	output << "attribute \"dep\" string \"positions\"" << std::endl;
	output << std::endl;
	output << "object \"FEMFunction-2d\" class field" << std::endl;
	output << "component \"positions\" value 1" << std::endl;
	output << "component \"connections\" value 2" << std::endl;
	output << "component \"data\" value 3" << std::endl;
	output << "end" << std::endl;

	output.close();
	std::cout << "P1 Element Solver Finished" << std::endl;
}

void P2Element_Solver(size_t nx, size_t ny)
{
	std::cout << std::endl
			  << "P2 Element Solver Start" << std::endl;

	///P2 Element
	/// Need a reference element. For this example, only one kind of
	/// element.
	P2Element<double> ref_ele;

	/// The algebraic accuracy for the quadrature on the element is 2.
	ref_ele.read_quad_info("data/triangle.tmp_geo", 2);
	/// Get the quadrature information from the reference element.
	int n_quad_pnts = ref_ele.get_n_quad_points();
	double volume = ref_ele.get_volume();

	Point<2, double> x0y0({0.0, 0.0});
	Point<2, double> x1y1({1.0, 1.0});
	double hx = (x1y1[0] - x0y0[0]) / nx;
	double hy = (x1y1[1] - x0y0[1]) / ny;

	size_t n_ele = nx * ny * 2;
	size_t n_dof = ref_ele.get_n_dofs();
	size_t n_total_dofs = (2 * nx + 1) * (2 * ny + 1); //直线上加密一倍（只在完全由矩形分割的时候有效）

	/// Here we use Eigen library for all linear structures and
	/// operators.
	// Matrix<double> A(n_total_dofs, n_total_dofs);
	// std::valarray<double> rhs(0.0, n_total_dofs); / Linear algebra
	// interface. To Eigen.
	SpMat K(n_total_dofs, n_total_dofs);
	RHS Rhs(n_total_dofs);
	std::vector<Tri> TriList(n_ele * n_dof * n_dof * n_quad_pnts);
	std::vector<Tri>::iterator it = TriList.begin();
	std::valarray<int> dof(6);
	/// Assembling ... For each element in the mesh.
	for (size_t ei = 0; ei < n_ele; ei++)
	{
		if (ei % 2 == 0)
		{
			size_t ix = (ei / 2) % nx;
			size_t iy = (ei / 2) / nx;
			Point<2, double> v0({ix * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, iy * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});
			Point<2, double> v3({(ix + 0.5) * hx, (iy + 0.5) * hy}); //讲道理应该有中点计算的但是它好像不能直接当成valarray来直接计算就懒得写了
			Point<2, double> v4({ix * hx, (iy + 0.5) * hy});
			Point<2, double> v5({(ix + 0.5) * hx, iy * hy});

			dof[0] = 2 * iy * (2 * nx + 1) + 2 * ix;
			dof[1] = 2 * iy * (2 * nx + 1) + 2 * (ix + 1);
			dof[2] = 2 * (iy + 1) * (2 * nx + 1) + 2 * ix;
			dof[3] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 0.5);
			dof[4] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * ix;
			dof[5] = 2 * iy * (2 * nx + 1) + 2 * (ix + 0.5);

			v0.set_index(dof[0]);
			v1.set_index(dof[1]);
			v2.set_index(dof[2]);
			v3.set_index(dof[3]);
			v4.set_index(dof[4]);
			v5.set_index(dof[5]);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
			ref_ele.set_global_dof(3, v3);
			ref_ele.set_global_dof(4, v4);
			ref_ele.set_global_dof(5, v5);
		}
		else
		{
			size_t ix = ((ei - 1) / 2) % nx;
			size_t iy = ((ei - 1) / 2) / nx;
			Point<2, double> v0({(ix + 1) * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, (iy + 1) * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});
			Point<2, double> v3({(ix + 0.5) * hx, (iy + 1) * hy});
			Point<2, double> v4({(ix + 0.5) * hx, (iy + 0.5) * hy});
			Point<2, double> v5({(ix + 1) * hx, (iy + 0.5) * hy});

			dof[0] = 2 * iy * (2 * nx + 1) + 2 * (ix + 1);
			dof[1] = 2 * (iy + 1) * (2 * nx + 1) + 2 * (ix + 1);
			dof[2] = 2 * (iy + 1) * (2 * nx + 1) + 2 * ix;
			dof[3] = 2 * (iy + 1) * (2 * nx + 1) + 2 * (ix + 0.5);
			dof[4] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 0.5);
			dof[5] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 1);

			v0.set_index(dof[0]);
			v1.set_index(dof[1]);
			v2.set_index(dof[2]);
			v3.set_index(dof[3]);
			v4.set_index(dof[4]);
			v5.set_index(dof[5]);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
			ref_ele.set_global_dof(3, v3);
			ref_ele.set_global_dof(4, v4);
			ref_ele.set_global_dof(5, v5);
		}
		/// Get the quadrature information from the reference element.
		const std::valarray<Point<2, double>> &quad_pnts = ref_ele.get_quad_points();
		const std::valarray<double> &weights = ref_ele.get_quad_weights();

		/// Begin to assemble ...
		for (size_t l = 0; l < n_quad_pnts; l++)
		{
			/// Jacobian information for the coordinates transformation.
			double detJ = ref_ele.global2local_Jacobi_det(quad_pnts[l]);
			Matrix<double> invJ = ref_ele.local2global_Jacobi(quad_pnts[l]);

			/// Build the entities of the coefficent matrix and right hand side vector.
			for (size_t i = 0; i < n_dof; i++)
			{
				std::valarray<double> grad_i = invJ * ref_ele.basis_gradient(i, quad_pnts[l]);
				double phi_i = ref_ele.basis_function(i, quad_pnts[l]);
				for (size_t j = 0; j < n_dof; j++)
				{
					std::valarray<double> grad_j = invJ * ref_ele.basis_gradient(j, quad_pnts[l]);
					/// a(grad phi_i, grad phi_j)
					double cont = (grad_i[0] * grad_j[0] + grad_i[1] * grad_j[1]) * weights[l] * detJ * volume;
					// A(dof[i], dof[j]) += cont;
					*it = Tri(dof[i], dof[j], cont);
					it++;
				}
				/// rhs(i) = \int f(x_i) phi_i dx
				// rhs[dof[i]] += f(ref_ele.local2global(quad_pnts[l])) * phi_i * weights[l] * detJ * volume;
				Rhs[dof[i]] += f(ref_ele.local2global(quad_pnts[l])) * phi_i * weights[l] * detJ * volume;
			}
		}
	}
	K.setFromTriplets(TriList.begin(), TriList.end());
	K.makeCompressed();

	for (size_t i = 0; i < n_total_dofs; i++)
	{
		size_t ix = i % (2 * nx + 1);
		size_t iy = i / (2 * nx + 1);

		if (ix == 0 || ix == 2 * nx || iy == 0 || iy == 2 * ny)
		{
			Point<2, double> global_dof({ix * hx / 2., iy * hy / 2.});
			double boundary_value = u(global_dof);
			Rhs[i] = boundary_value * K.coeffRef(i, i);
			for (Eigen::SparseMatrix<double>::InnerIterator it(K, i); it; ++it)
			{
				size_t row = it.row();
				if (row == i)
					continue;
				Rhs[row] -= K.coeffRef(i, row) * boundary_value;
				K.coeffRef(i, row) = 0.0;
				K.coeffRef(row, i) = 0.0;
			}
		}
	};

	Eigen::ConjugateGradient<Eigen::SparseMatrix<double>> Solver_sparse;
	Eigen::VectorXd x1_sparse;
	Solver_sparse.setTolerance(1e-15);
	Solver_sparse.compute(K);
	Solution solution = Solver_sparse.solve(Rhs);

	/// stupid error!
	double error = 0.0;
	for (size_t i = 0; i < n_total_dofs; i++)
	{
		size_t ix = i % (2 * nx + 1);
		size_t iy = i / (2 * nx + 1);
		Point<2, double> global_dof({ix * hx / 2, iy * hy / 2});

		error += (solution[i] - u(global_dof)) * (solution[i] - u(global_dof));
	}
	error = std::sqrt(error);
	std::cout << "error: " << error << std::endl;

	///L2error
	ref_ele.read_quad_info("data/triangle.tmp_geo", 6);
	error = 0.0;
	const std::valarray<Point<2, double>> &quad_pnts = ref_ele.get_quad_points();
	const std::valarray<double> &weights = ref_ele.get_quad_weights();
	n_quad_pnts = ref_ele.get_n_quad_points();
	for (size_t ei = 0; ei < n_ele; ei++)
	{
		if (ei % 2 == 0)
		{
			size_t ix = (ei / 2) % nx;
			size_t iy = (ei / 2) / nx;
			Point<2, double> v0({ix * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, iy * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});
			Point<2, double> v3 = mid_point<2, double>(v1, v2);
			Point<2, double> v4 = mid_point<2, double>(v0, v2);
			Point<2, double> v5 = mid_point<2, double>(v1, v0);

			dof[0] = 2 * iy * (2 * nx + 1) + 2 * ix;
			dof[1] = 2 * iy * (2 * nx + 1) + 2 * (ix + 1);
			dof[2] = 2 * (iy + 1) * (2 * nx + 1) + 2 * ix;
			dof[3] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 0.5);
			dof[4] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * ix;
			dof[5] = 2 * iy * (2 * nx + 1) + 2 * (ix + 0.5);

			v0.set_index(dof[0]);
			v1.set_index(dof[1]);
			v2.set_index(dof[2]);
			v3.set_index(dof[3]);
			v4.set_index(dof[4]);
			v5.set_index(dof[5]);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
			ref_ele.set_global_dof(3, v3);
			ref_ele.set_global_dof(4, v4);
			ref_ele.set_global_dof(5, v5);
		}
		else
		{
			size_t ix = ((ei - 1) / 2) % nx;
			size_t iy = ((ei - 1) / 2) / nx;
			Point<2, double> v0({(ix + 1) * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, (iy + 1) * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});
			Point<2, double> v3 = mid_point<2, double>(v1, v2);
			Point<2, double> v4 = mid_point<2, double>(v0, v2);
			Point<2, double> v5 = mid_point<2, double>(v1, v0);
			// Point<2, double> v3({(ix + 0.5) * hx, (iy + 1) * hy}); 留给debug
			// Point<2, double> v4({(ix + 0.5) * hx, (iy + 0.5) * hy});
			// Point<2, double> v5({(ix + 1) * hx, (iy + 0.5) * hy});

			dof[0] = 2 * iy * (2 * nx + 1) + 2 * (ix + 1);
			dof[1] = 2 * (iy + 1) * (2 * nx + 1) + 2 * (ix + 1);
			dof[2] = 2 * (iy + 1) * (2 * nx + 1) + 2 * ix;
			dof[3] = 2 * (iy + 1) * (2 * nx + 1) + 2 * (ix + 0.5);
			dof[4] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 0.5);
			dof[5] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 1);

			v0.set_index(dof[0]);
			v1.set_index(dof[1]);
			v2.set_index(dof[2]);
			v3.set_index(dof[3]);
			v4.set_index(dof[4]);
			v5.set_index(dof[5]);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
			ref_ele.set_global_dof(3, v3);
			ref_ele.set_global_dof(4, v4);
			ref_ele.set_global_dof(5, v5);
		}

		/// Begin to assemble ...
		for (size_t l = 0; l < n_quad_pnts; l++)
		{
			///
			double detJ = ref_ele.global2local_Jacobi_det(quad_pnts[l]);
			double u_sol = 0;
			for (size_t i = 0; i < n_dof; i++)
			{
				double phi_i = ref_ele.basis_function(i, quad_pnts[l]);
				u_sol += phi_i * solution[dof[i]];
			}
			error += (u(ref_ele.local2global(quad_pnts[l])) - u_sol) * (u(ref_ele.local2global(quad_pnts[l])) - u_sol) * weights[l] * detJ * volume;
		}
	}
	error = std::sqrt(error);
	std::cout << "L2 error: " << error << std::endl;

	/// Output the solution to file in Matlab format.
	std::ofstream output("u2.m");
	int plot_multiple = 5;
	output << "x=linspace(" << x0y0[0] << "," << x1y1[0] << "," << plot_multiple * nx + 1 << ");" << std::endl;
	output << "y=linspace(" << x0y0[1] << "," << x1y1[1] << "," << plot_multiple * ny + 1 << ");" << std::endl;
	output << "z=zeros(" << plot_multiple * ny + 1 << "," << plot_multiple * nx + 1 << ");" << std::endl;

	for (size_t ei = 0; ei < n_ele; ei++)
	{
		if (ei % 2 == 0)
		{
			size_t ix = (ei / 2) % nx;
			size_t iy = (ei / 2) / nx;
			Point<2, double> v0({ix * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, iy * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});
			Point<2, double> v3 = mid_point<2, double>(v1, v2);
			Point<2, double> v4 = mid_point<2, double>(v0, v2);
			Point<2, double> v5 = mid_point<2, double>(v1, v0);

			dof[0] = 2 * iy * (2 * nx + 1) + 2 * ix;
			dof[1] = 2 * iy * (2 * nx + 1) + 2 * (ix + 1);
			dof[2] = 2 * (iy + 1) * (2 * nx + 1) + 2 * ix;
			dof[3] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 0.5);
			dof[4] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * ix;
			dof[5] = 2 * iy * (2 * nx + 1) + 2 * (ix + 0.5);

			v0.set_index(dof[0]);
			v1.set_index(dof[1]);
			v2.set_index(dof[2]);
			v3.set_index(dof[3]);
			v4.set_index(dof[4]);
			v5.set_index(dof[5]);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
			ref_ele.set_global_dof(3, v3);
			ref_ele.set_global_dof(4, v4);
			ref_ele.set_global_dof(5, v5);

			for (int iix = 0; iix < plot_multiple; iix++)
				for (int iiy = 0; iiy <= plot_multiple - iix; iiy++)
				{
					Point<2, double> global_pnt({(ix + 1. * iix / plot_multiple) * hx, (iy + 1. * iiy / plot_multiple) * hy});
					Point<2, double> local_pnt = ref_ele.global2local(global_pnt);
					double u_sol = 0;
					for (size_t i = 0; i < n_dof; i++)
					{
						double phi_i = ref_ele.basis_function(i, local_pnt);
						u_sol += phi_i * solution[dof[i]];
					}
					output << "z(" << iy * plot_multiple + iiy + 1 << "," << ix * plot_multiple + iix + 1 << ")=" << u_sol << ";" << std::endl;
				}
		}
		else
		{
			size_t ix = ((ei - 1) / 2) % nx;
			size_t iy = ((ei - 1) / 2) / nx;
			Point<2, double> v0({(ix + 1) * hx, iy * hy});
			Point<2, double> v1({(ix + 1) * hx, (iy + 1) * hy});
			Point<2, double> v2({ix * hx, (iy + 1) * hy});
			Point<2, double> v3 = mid_point<2, double>(v1, v2);
			Point<2, double> v4 = mid_point<2, double>(v0, v2);
			Point<2, double> v5 = mid_point<2, double>(v1, v0);

			dof[0] = 2 * iy * (2 * nx + 1) + 2 * (ix + 1);
			dof[1] = 2 * (iy + 1) * (2 * nx + 1) + 2 * (ix + 1);
			dof[2] = 2 * (iy + 1) * (2 * nx + 1) + 2 * ix;
			dof[3] = 2 * (iy + 1) * (2 * nx + 1) + 2 * (ix + 0.5);
			dof[4] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 0.5);
			dof[5] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 1);

			ref_ele.set_global_dof(0, v0);
			ref_ele.set_global_dof(1, v1);
			ref_ele.set_global_dof(2, v2);
			ref_ele.set_global_dof(3, v3);
			ref_ele.set_global_dof(4, v4);
			ref_ele.set_global_dof(5, v5);

			for (int iix = 0; iix < plot_multiple; iix++)
				for (int iiy = plot_multiple - iix; iiy < plot_multiple; iiy++)
				{
					Point<2, double> global_pnt({(ix + 1. * iix / plot_multiple) * hx, (iy + 1. * iiy / plot_multiple) * hy});
					Point<2, double> local_pnt = ref_ele.global2local(global_pnt);
					double u_sol = 0;
					for (size_t i = 0; i < n_dof; i++)
					{
						double phi_i = ref_ele.basis_function(i, local_pnt);
						u_sol += phi_i * solution[dof[i]];
					}
					output << "z(" << iy * plot_multiple + iiy + 1 << "," << ix * plot_multiple + iix + 1 << ")=" << u_sol << ";" << std::endl;
				}
		}
	}
	// output << "surf(x,y,z);" << std::endl;
	// output << "shading interp" << std::endl;
	output << "[x,y]=meshgrid(x,y);" << std::endl;
	output << " T = delaunay(x,y);" << std::endl;
	output << "trisurf(T,x,y,z);" << std::endl;
	output.close();

	/// Output the solution to file in OpenDx format.
	output.open("u2.dx");
	plot_multiple = 5;
	nx *= plot_multiple;
	ny *= plot_multiple;
	size_t n_node = (nx + 1) * (ny + 1);
	n_ele = nx * ny * 2;

	output << "object 1 class array type float rank 1 shape 2 item "
		   << n_node
		   << " data follows" << std::endl;
	output.setf(std::ios::fixed);

	output.precision(20);
	for (size_t ni = 0; ni < n_node; ni++)
	{
		size_t ix = (ni) % (nx + 1);
		size_t iy = (ni) / (nx + 1);
		output << ix * hx / plot_multiple << "\t" << iy * hy / plot_multiple << std::endl;
	}

	output << std::endl;
	output << "object 2 class array type int rank 1 shape 3 item " << n_ele << " data follows" << std::endl;

	for (size_t ei = 0; ei < n_ele; ei++)
	{
		size_t node[3];
		if (ei % 2 == 0)
		{
			size_t ix = (ei / 2) % nx;
			size_t iy = (ei / 2) / nx;

			node[0] = iy * (nx + 1) + ix;
			node[1] = iy * (nx + 1) + ix + 1;
			node[2] = (iy + 1) * (nx + 1) + ix;
		}
		else
		{
			size_t ix = ((ei - 1) / 2) % nx;
			size_t iy = ((ei - 1) / 2) / nx;

			node[0] = iy * (nx + 1) + ix + 1;
			node[1] = (iy + 1) * (nx + 1) + ix + 1;
			node[2] = (iy + 1) * (nx + 1) + ix;
		}
		output << node[0] << "\t" << node[1] << "\t" << node[2] << std::endl;
	}
	output << "attribute \"element type\" string \"triangles\"" << std::endl;
	output << "attribute \"ref\" string \"positions\"" << std::endl;
	output << std::endl;
	output << "object 3 class array type float rank 0 item "
		   << n_node
		   << " data follows" << std::endl;
	nx /= plot_multiple;
	ny /= plot_multiple;

	size_t last_ei = 999;
	for (size_t ni = 0; ni < n_node; ni++)
	{
		size_t ix = (ni) % (plot_multiple * nx + 1); //index of node
		size_t iy = (ni) / (plot_multiple * nx + 1);
		Point<2, double> global_pnt({ix * hx / plot_multiple, iy * hy / plot_multiple});
		int is_upper = (ix % plot_multiple + iy % plot_multiple) > plot_multiple; //wheater node in the upper triangle
		ix /= plot_multiple;													  //index of element
		iy /= plot_multiple;
		if (ix == nx)
		{
			ix--;
			is_upper = 1;
		}
		if (iy == ny)
		{
			iy--;
			is_upper = 1;
		}

		size_t ei = (2 * ix) + iy * (2 * nx) + is_upper; //right boundary,upper boundary
		if (ei != last_ei)
			if (is_upper == 0)
			{
				Point<2, double> v0({ix * hx, iy * hy});
				Point<2, double> v1({(ix + 1) * hx, iy * hy});
				Point<2, double> v2({ix * hx, (iy + 1) * hy});
				Point<2, double> v3 = mid_point<2, double>(v1, v2);
				Point<2, double> v4 = mid_point<2, double>(v0, v2);
				Point<2, double> v5 = mid_point<2, double>(v1, v0);

				dof[0] = 2 * iy * (2 * nx + 1) + 2 * ix;
				dof[1] = 2 * iy * (2 * nx + 1) + 2 * (ix + 1);
				dof[2] = 2 * (iy + 1) * (2 * nx + 1) + 2 * ix;
				dof[3] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 0.5);
				dof[4] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * ix;
				dof[5] = 2 * iy * (2 * nx + 1) + 2 * (ix + 0.5);

				v0.set_index(dof[0]);
				v1.set_index(dof[1]);
				v2.set_index(dof[2]);
				v3.set_index(dof[3]);
				v4.set_index(dof[4]);
				v5.set_index(dof[5]);

				ref_ele.set_global_dof(0, v0);
				ref_ele.set_global_dof(1, v1);
				ref_ele.set_global_dof(2, v2);
				ref_ele.set_global_dof(3, v3);
				ref_ele.set_global_dof(4, v4);
				ref_ele.set_global_dof(5, v5);
			}
			else
			{
				Point<2, double> v0({(ix + 1) * hx, iy * hy});
				Point<2, double> v1({(ix + 1) * hx, (iy + 1) * hy});
				Point<2, double> v2({ix * hx, (iy + 1) * hy});
				Point<2, double> v3 = mid_point<2, double>(v1, v2);
				Point<2, double> v4 = mid_point<2, double>(v0, v2);
				Point<2, double> v5 = mid_point<2, double>(v1, v0);

				dof[0] = 2 * iy * (2 * nx + 1) + 2 * (ix + 1);
				dof[1] = 2 * (iy + 1) * (2 * nx + 1) + 2 * (ix + 1);
				dof[2] = 2 * (iy + 1) * (2 * nx + 1) + 2 * ix;
				dof[3] = 2 * (iy + 1) * (2 * nx + 1) + 2 * (ix + 0.5);
				dof[4] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 0.5);
				dof[5] = 2 * (iy + 0.5) * (2 * nx + 1) + 2 * (ix + 1);

				ref_ele.set_global_dof(0, v0);
				ref_ele.set_global_dof(1, v1);
				ref_ele.set_global_dof(2, v2);
				ref_ele.set_global_dof(3, v3);
				ref_ele.set_global_dof(4, v4);
				ref_ele.set_global_dof(5, v5);
			}
		Point<2, double> local_pnt = ref_ele.global2local(global_pnt);
		double u_sol = 0;
		for (size_t i = 0; i < n_dof; i++)
		{
			double phi_i = ref_ele.basis_function(i, local_pnt);
			u_sol += phi_i * solution[dof[i]];
		}
		output << u_sol << std::endl;
		last_ei = ei;
	}

	output << "attribute \"dep\" string \"positions\"" << std::endl;
	output << std::endl;
	output << "object \"FEMFunction-2d\" class field" << std::endl;
	output << "component \"positions\" value 1" << std::endl;
	output << "component \"connections\" value 2" << std::endl;
	output << "component \"data\" value 3" << std::endl;
	output << "end" << std::endl;
	output.close();

	std::cout << "P2 Element Solver Finished" << std::endl;
}